Setup

root = "./"
# Import functions
source(file.path(root, "./general_functions.R"))
import_parameters(params)
## **** __Utilized Cores__ **** = 4$subsetGenes
## [1] "protein_coding"
## 
## $subsetCells
## [1] "400"
## 
## $resolution
## [1] "0.2"
## 
## $resultsPath
## [1] "Results/subsetGenes-protein_coding__subsetCells-400__Resolution-0.2__perplexity-30__nCores-4"
## 
## $nCores
## [1] 4
## 
## $perplexity
## [1] "30"
load(file.path(resultsPath, "scRNAseq_results.RData"))

__Results/subsetGenes-protein_coding__subsetCells-400__Resolution-0.2__perplexity-30nCores-4

Load Libraries

library(Seurat)
library(dplyr)
library(gridExtra)
library(knitr) 
library(plotly)
library(ggplot2)
library(viridis)
library(reshape2)
library(shiny) 
library(ggrepel)
library(DT) 
library(ComplexHeatmap); #BiocManager::install("ComplexHeatmap") 
  
## Install Bioconductor
#  if (!requireNamespace("BiocManager"))
#     install.packages("BiocManager") 
library(biomaRt) # BiocManager::install(c("biomaRt"))
library(DESeq2) # BiocManager::install(c("DESeq2"))
library(enrichR) #BiocManager::install("enrichR")

library(monocle) #BiocManager::install("monocle")
# BiocManager::install("DelayedMatrixStats")
# BiocManager::install("org.Mm.eg.db") 
library(org.Hs.eg.db)
library(garnett) # devtools::install_github("cole-trapnell-lab/garnett") 
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS  10.14.2
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] splines   stats4    parallel  grid      stats     graphics  grDevices
##  [8] utils     datasets  methods   base     
## 
## other attached packages:
##  [1] DESeq2_1.22.2               SummarizedExperiment_1.12.0
##  [3] DelayedArray_0.8.0          BiocParallel_1.16.5        
##  [5] matrixStats_0.54.0          GenomicRanges_1.34.0       
##  [7] GenomeInfoDb_1.18.1         RColorBrewer_1.1-2         
##  [9] bindrcpp_0.2.2              garnett_0.1.1              
## [11] org.Hs.eg.db_3.7.0          AnnotationDbi_1.44.0       
## [13] IRanges_2.16.0              S4Vectors_0.20.1           
## [15] monocle_2.10.1              DDRTree_0.1.5              
## [17] irlba_2.3.3                 VGAM_1.0-6                 
## [19] Biobase_2.42.0              BiocGenerics_0.28.0        
## [21] enrichR_1.0                 biomaRt_2.38.0             
## [23] ComplexHeatmap_1.20.0       ggrepel_0.8.0              
## [25] reshape2_1.4.3              viridis_0.5.1              
## [27] viridisLite_0.3.0           DT_0.5.1                   
## [29] shiny_1.2.0                 plotly_4.8.0               
## [31] knitr_1.21                  gridExtra_2.3              
## [33] dplyr_0.7.8                 Seurat_2.3.4               
## [35] Matrix_1.2-15               cowplot_0.9.4              
## [37] ggplot2_3.1.0              
## 
## loaded via a namespace (and not attached):
##   [1] reticulate_1.10        R.utils_2.7.0          tidyselect_0.2.5      
##   [4] RSQLite_2.1.1          htmlwidgets_1.3        combinat_0.0-8        
##   [7] trimcluster_0.1-2.1    docopt_0.6.1           Rtsne_0.15            
##  [10] munsell_0.5.0          codetools_0.2-16       ica_1.0-2             
##  [13] withr_2.1.2            colorspace_1.4-0       fastICA_1.2-1         
##  [16] rstudioapi_0.9.0       ROCR_1.0-7             robustbase_0.93-3     
##  [19] dtw_1.20-1             gbRd_0.4-11            Rdpack_0.10-1         
##  [22] labeling_0.3           lars_1.2               slam_0.1-44           
##  [25] GenomeInfoDbData_1.2.0 bit64_0.9-7            pheatmap_1.0.12       
##  [28] xfun_0.4               diptest_0.75-7         R6_2.3.0              
##  [31] locfit_1.5-9.1         hdf5r_1.0.1            flexmix_2.3-14        
##  [34] bitops_1.0-6           assertthat_0.2.0       promises_1.0.1        
##  [37] SDMTools_1.1-221       scales_1.0.0           nnet_7.3-12           
##  [40] gtable_0.2.0           npsurv_0.4-0           rlang_0.3.1           
##  [43] genefilter_1.64.0      GlobalOptions_0.1.0    lazyeval_0.2.1        
##  [46] acepack_1.4.1          checkmate_1.9.1        yaml_2.2.0            
##  [49] crosstalk_1.0.0        backports_1.1.3        httpuv_1.4.5.1        
##  [52] Hmisc_4.2-0            tools_3.5.1            gplots_3.0.1.1        
##  [55] proxy_0.4-22           ggridges_0.5.1         Rcpp_1.0.0            
##  [58] plyr_1.8.4             zlibbioc_1.28.0        base64enc_0.1-3       
##  [61] progress_1.2.0         purrr_0.3.0            RCurl_1.95-4.11       
##  [64] densityClust_0.3       prettyunits_1.0.2      rpart_4.1-13          
##  [67] pbapply_1.4-0          GetoptLong_0.1.7       zoo_1.8-4             
##  [70] cluster_2.0.7-1        magrittr_1.5           data.table_1.12.0     
##  [73] circlize_0.4.5         lmtest_0.9-36          RANN_2.6.1            
##  [76] mvtnorm_1.0-8          fitdistrplus_1.0-14    hms_0.4.2             
##  [79] lsei_1.2-0             mime_0.6               evaluate_0.12         
##  [82] xtable_1.8-3           XML_3.98-1.17          mclust_5.4.2          
##  [85] sparsesvd_0.1-4        shape_1.4.4            HSMMSingleCell_1.2.0  
##  [88] compiler_3.5.1         tibble_2.0.1           KernSmooth_2.23-15    
##  [91] crayon_1.3.4           R.oo_1.22.0            htmltools_0.3.6       
##  [94] segmented_0.5-3.0      later_0.8.0            Formula_1.2-3         
##  [97] snow_0.4-3             geneplotter_1.60.0     tidyr_0.8.2           
## [100] DBI_1.0.0              MASS_7.3-51.1          fpc_2.1-11.1          
## [103] R.methodsS3_1.7.1      gdata_2.18.0           metap_1.1             
## [106] bindr_0.1.1            igraph_1.2.2           pkgconfig_2.0.2       
## [109] foreign_0.8-71         foreach_1.4.4          annotate_1.60.0       
## [112] XVector_0.22.0         bibtex_0.4.2           stringr_1.4.0         
## [115] digest_0.6.18          tsne_0.1-3             rmarkdown_1.11        
## [118] htmlTable_1.13.1       kernlab_0.9-27         gtools_3.8.1          
## [121] modeltools_0.2-22      rjson_0.2.20           nlme_3.1-137          
## [124] jsonlite_1.6           limma_3.38.3           pillar_1.3.1          
## [127] lattice_0.20-38        httr_1.4.0             DEoptimR_1.0-8        
## [130] survival_2.43-3        glue_1.3.0             qlcMatrix_0.9.7       
## [133] FNN_1.1.2.2            png_0.1-7              prabclus_2.2-7        
## [136] iterators_1.0.10       bit_1.1-14             class_7.3-15          
## [139] stringi_1.2.4          mixtools_1.1.0         blob_1.1.1            
## [142] doSNOW_1.0.16          latticeExtra_0.6-28    caTools_1.17.1.1      
## [145] memoise_1.1.0          ape_5.2
print(paste("Seurat ", packageVersion("Seurat")))
## [1] "Seurat  2.3.4"

Enrichment

enrichr_dbs <- c("KEGG_2018", "Reactome_2016",
                 "GO_Biological_Process_2018", "GO_Molecular_Function_2018", "GO_Cellular_Component_2018", 
                 "Rare_Diseases_AutoRIF_ARCHS4_Predictions", "Human_Gene_Atlas")
createDT(enrichR::listEnrichrDbs(), "Enrichr Databases")

Enrichr on Clusters

for (clust in unique(DAT.markers.sig$cluster)){
  cat('\n')
  cat("### Cluster ",clust,"{.tabset .tabset-fade}\n")
  geneList <- subset(DAT.markers.sig, cluster==clust)$gene  %>% as.character()
  results <- enrichr(genes = geneList, databases = enrichr_dbs )
  for (db in enrichr_dbs){
    cat('\n')
    cat("#### ",db,"\n")  
    createDT_html(subset(results[[db]], Adjusted.P.value<=0.05), paste("Enrichr Results: ",db,"Cluster ", clust))
    cat('\n')
  } 
  cat('\n')
} 
## Error in unique(DAT.markers.sig$cluster): object 'DAT.markers.sig' not found

Enrichr on WGCNA Modules

  • Background: Bulk RNA-seq was conducted on monocytes extracted from the blood of controls and PD patients. Katia Lopes conducted Weighted Correlation Network Analysis (WGCNA) on these sameples and identified co-expression modules.
  • Objective: Determine whether any of these modules are representative of cell groups in our scRNA-seq monocytes data.
eigengenes <- read.delim(file.path(root,"Data/bulkMonocytes_WGCNAmodules_geneMembership.txt"), row.names = NULL)
modules <- read.delim(file.path(root,"Data/bulkMonocytes_WGCNAmodules_geneModules.txt"), row.names = NULL, sep = "", 
                      col.names = c("Ensembl","moduleColors")) 
modules <- base::merge(eigengenes, modules,by="Ensembl" )

for (mod in unique(modules$moduleColors)){
  cat('\n')
  cat("### Module ",mod,"{.tabset .tabset-fade}\n")
  geneList <- subset(modules, moduleColors==mod)$symbol %>% as.character()
  results <- enrichr(genes = geneList, databases = enrichr_dbs )
  for (db in enrichr_dbs){
    cat('\n')
    cat("#### ",db,"\n")  
    createDT_html(subset(results[[db]], Adjusted.P.value<=0.05), paste("Enrichr Results:",db,"Module", mod))
    cat('\n')
  } 
  cat('\n')
}

Module blue

Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.

KEGG_2018

Reactome_2016

GO_Biological_Process_2018

GO_Molecular_Function_2018

GO_Cellular_Component_2018

Rare_Diseases_AutoRIF_ARCHS4_Predictions

Human_Gene_Atlas

Module red

Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.

KEGG_2018

Reactome_2016

GO_Biological_Process_2018

GO_Molecular_Function_2018

GO_Cellular_Component_2018

Rare_Diseases_AutoRIF_ARCHS4_Predictions

Human_Gene_Atlas

Module turquoise

Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.

KEGG_2018

Reactome_2016

GO_Biological_Process_2018

GO_Molecular_Function_2018

GO_Cellular_Component_2018

Rare_Diseases_AutoRIF_ARCHS4_Predictions

## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html

Human_Gene_Atlas

Module green

Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.

KEGG_2018

Reactome_2016

GO_Biological_Process_2018

GO_Molecular_Function_2018

GO_Cellular_Component_2018

Rare_Diseases_AutoRIF_ARCHS4_Predictions

Human_Gene_Atlas

Module pink

Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.

KEGG_2018

Reactome_2016

GO_Biological_Process_2018

GO_Molecular_Function_2018

GO_Cellular_Component_2018

Rare_Diseases_AutoRIF_ARCHS4_Predictions

Human_Gene_Atlas

Module black

Uploading data to Enrichr… Done. Querying KEGG_2018… Done. Querying Reactome_2016… Done. Querying GO_Biological_Process_2018… Done. Querying GO_Molecular_Function_2018… Done. Querying GO_Cellular_Component_2018… Done. Querying Rare_Diseases_AutoRIF_ARCHS4_Predictions… Done. Querying Human_Gene_Atlas… Done. Parsing results… Done.

KEGG_2018

Reactome_2016

GO_Biological_Process_2018

GO_Molecular_Function_2018

GO_Cellular_Component_2018

Rare_Diseases_AutoRIF_ARCHS4_Predictions

Human_Gene_Atlas

WGCNA Eigengenes

Determine whether each of the clusters in scRNA-seq data are enriched for WGCNA eigengenes (a weighted vector of all genes representing each co-expression module).

https://ucdavis-bioinformatics-training.github.io/2017_2018-single-cell-RNA-sequencing-Workshop-UCD_UCB_UCSF/day3/scRNA_Workshop-PART6.html

#Get the average expression of every gene in each cluster
allGenes <- get_markerDF(DAT, markerList = row.names(DAT@scale.data), meta_vars = c("post_clustering", "barcode") )

clusterGeneAvg <- allGenes %>% group_by(post_clustering, Gene) %>% summarise(meanExp = mean(Expression))
eigengenes_filt <-  subset(eigengenes,symbol %in%  unique(clusterGeneAvg$Gene))


clusts_by_mods <- base::merge(clusterGeneAvg[c("Gene","meanExp")], eigengenes_filt[c("symbol", modName)], 
                              by.x="Gene", by.y="symbol")


cor.test()
corrplot()
heatmap.2
 

f <- function(module){
  eigengene <-  eigengenes[paste0("MM", mod)]
  means <- tapply(eigengenes, DAT@meta.data$post_clustering, mean, na.rm = T)
  return(means)
}
modules <- c("blue", "brown", "green", "turquoise", "yellow")
plotdat <- sapply(modules, f)
matplot(plotdat, col = modules, type = "l", lwd = 2, xaxt = "n", xlab = "Seurat Cluster",
        ylab = "WGCNA Module Eigengene")
axis(1, at = 1:16, labels = 0:15)
matpoints(plotdat, col = modules, pch = 21)

RRHO

library(RRHO) #BiocManager::install("RRHO")

# list.length <- 100
#  list.names <- paste('Gene',1:list.length, sep='')
# gene.list1<- data.frame(list.names, sample(100))
# gene.list2<- data.frame(list.names, sample(100))


for (clust in unique(DAT.markers.sig)){
  # Compare each cluster
  subClust <- subset(DAT.markers.sig, cluster==clust)  %>% arrange(desc(avg_logFC))
  
  for (mod in unique(modules$moduleColors)){ 
    # Sort genes by module membership
    modName <-paste("MM",mod,sep="")
    subMod <- subset(modules, moduleColors==mod) %>% arrange(desc(eval(parse(text = modName))))
    maxGenes <- min(length(subClust$gene), subMod$symbol) %>% as.numeric()
    
    list1 <- subClust[1:maxGenes, c("gene","FC")] %>% dplyr::rename(value=FC)
    list2 <- subMod[1:maxGenes, c("symbol",modName)] %>% dplyr::rename(gene=symbol, value=modName)
    
    RRHO_path <-file.path("RRHO_results",paste(paste("Cluster",clust,sep=""),"vs",modName,sep="_"))
    dir.create(RRHO_path,recursive = T, showWarnings = F)
    
    RRHO_results <- RRHO(list1=list1, list2=list2,
         labels = c(paste("Cluster",clust,sep="_"), paste("Module",mod,sep="_")), 
         plots = T, alternative = "enrichment", outputdir = RRHO_path, BY=TRUE
         )
    lattice::levelplot(RRHO_results$hypermat) 
    # Pval testing
    pval.testing <- pvalRRHO(RRHO_results, 50)
    pval.testing$pval
    xs<- seq(0, 10, length=100)
    plot(Vectorize(pval.testing$FUN.ecdf)(xs)~xs, xlab='-log(pvalue)', ylab='ECDF', type='S')
    lattice::levelplot(RRHO_results$hypermat.by)
  } 
} 

Save Results

save.image(file.path(resultsPath, "scRNAseq_results.RData"))